################################################################################
### Geographical Representation on the Floor:
### Parliamentary Rules and Legislative Speeches in Party‐Centered Contexts
### Author: Edoardo Alberto Viganò

### Appendix
#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

# List of required packages
required_pkgs <- c(
  "here", "tidyverse", "jtools", "lme4", "plm",
  "survival", "fixest", "stargazer", "MASS",
  "marginaleffects", "ggplot2", "vtable", "texreg"
)

# Install any that are missing
to_install <- required_pkgs[!(required_pkgs %in% installed.packages()[,"Package"])]
if(length(to_install)) install.packages(to_install, dependencies = TRUE)

# Load all
invisible(lapply(required_pkgs, library, character.only = TRUE))

### Load data
speeches <- readRDS(here("speeches_desc.rds"))
speeches_MP_17 <- readRDS(here("speeches_MP_17_desc.rds"))
speeches_MP_18 <- readRDS(here("speeches_MP_18_desc.rds"))
speeches_pt <- readRDS(here("speeches_main.rds"))
speeches_mp <- readRDS(here("speeches_mplevel.rds"))
speeches_compl <- readRDS(here("speeches_compl.rds"))

### TABLE B.1
st(speeches_sw,
   group = 'conversion',
   vars = c('age', "male", "ideol_dist", "loyalty", "terms", "office", "gov_office", 
            "gov_party", "rapporteur"),
   out = 'viewer',
   group.test = F,
   digits = 3,
)

### FIGURE B.1
summary_1718 <- bind_rows(speeches_MP_17, speeches_MP_18)

ggplot(summary_1718, aes(x = count_sb, y = count_sc)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = FALSE, color = "darkgrey", linetype = "dashed", size = 1) +
  annotate("text",
           x = Inf, y = Inf,
           hjust = 1.1, vjust = 1.5,
           label = paste("Correlation:", round(cor(summary_1718$count_sb, summary_1718$count_sc, use = "complete.obs"), 2)),
           size = 4, color = "black", family = "Roboto Condensed") +
  labs(
    x = "Number of speeches in bill debates",
    y = "Number of speeches in conversion debates"
  ) +
  theme_classic()


### FIGURE B.2
cap_plot <- speeches %>% 
  mutate(cap = as.factor(cap))

graph1 <- cap_plot |> 
    mutate(
      cap = fct_infreq(cap) |> fct_rev()) |> 
    ggplot(aes(y = cap, fill = as_factor(conversion))) +
    geom_bar() +
    labs(
      y = element_blank(), 
      x = 'Number of speeches',
      fill = "Bill"  ) +
    theme_minimal(base_family = "Roboto Condensed", base_size = 14) +
    scale_fill_discrete(labels=c('Ordinary', 'Conversion'))


(graph2 <- graph1 +
    scale_y_discrete(labels = c(
      "Foreign trade",
      "Public lands",
      "Technology",
      "Social welfare",
      "Agriculture",
      "International affairs",
      "Culture",
      "Energy",
      "Environment",
      "Immigration",
      "Defense",
      "Transportation",
      "Education",
      "Civil rights",
      "Housing and territorial policies",
      "Labor",
      "Domestic commerce",
      "Health",
      "Law and crime",
      "Macroeconomics",
      "Government operations"
    ))
)

### FIGURE B.3
glm1_2 <- clogit(reg_sp ~ conversion + nwords + rapporteur + gov_party + as.factor(year) +
                   as.factor(cap)  + strata(deputato), speeches_pt)

graph <- plot_summs(glm1_2, inner_ci_level = 0.9)

graph$data$term <- fct_recode(graph$data$term,
                              "Civil rights" = "as.factor(cap)2",
                              "Health" = "as.factor(cap)3",
                              "Agriculture" = "as.factor(cap)4",
                              "Labor" = "as.factor(cap)5",
                              "Education" = "as.factor(cap)6",
                              "Environment" = "as.factor(cap)7",
                              "Energy" = "as.factor(cap)8",
                              "Immigration" = "as.factor(cap)9",
                              "Transportation" = "as.factor(cap)10",
                              "Law and crime" = "as.factor(cap)12",
                              "Social welfare" = "as.factor(cap)13",
                              "Housing and territorial policies" = "as.factor(cap)14",
                              "Domestic commerce" = "as.factor(cap)15",
                              "Defense" = "as.factor(cap)16",
                              "Technology" = "as.factor(cap)17",
                              "Foreign trade" = "as.factor(cap)18",
                              "International affairs" = "as.factor(cap)19",
                              "Government operations" = "as.factor(cap)20",
                              "Public lands" = "as.factor(cap)21",
                              "Culture" = "as.factor(cap)23",
                              "Open access" = "conversion",
                              "Speech length" = "nwords",
                              "Rapporteur" = "rapporteur",
                              "2014" = "as.factor(year)2014", 
                              "2015" = "as.factor(year)2015", 
                              "2016" = "as.factor(year)2016", 
                              "2017" = "as.factor(year)2017", 
                              "2018" = "as.factor(year)2018", 
                              "2019" = "as.factor(year)2019", 
                              "2020" = "as.factor(year)2020", 
                              "2021" = "as.factor(year)2021", 
)

graph + 
  xlab("Coefficient estimate") + ylab("") +
  theme_classic()


### TABLE C.1
# Create interactions FE
speeches_pt <- speeches_pt %>%
  mutate(year_cap = interaction(year, cap, drop = TRUE)) %>% 
  mutate(mp_year = interaction(deputato, year, drop = TRUE)) %>% 
  mutate(mp_cap = interaction(deputato, cap, drop = TRUE)) %>% 
  mutate(term = sub(".*_(\\d{2})$", "\\1", deputato)) %>% 
  relocate(term, .after = deputato) %>% 
  mutate(mp_term = interaction(deputato, term, drop = TRUE)) %>% 
  mutate(party_term = interaction(party, term, drop = TRUE))


glm1_alt <- feglm(reg_sp ~ conversion | deputato,
                  data = speeches_pt, family = binomial("logit"))

glm2_alt <- feglm(reg_sp ~ conversion + nwords + rapporteur + gov_party | deputato + year + cap,
                  data = speeches_pt, family = binomial("logit"))

# Month-year FE
glm3_alt <- feglm(reg_sp ~ conversion + nwords + rapporteur + gov_party | deputato + month_year + cap,
                  data = speeches_pt, family = binomial("logit"))
# Year-topic FE
glm4_alt <- feglm(reg_sp ~ conversion | deputato + year_cap,
                  data = speeches_pt, family = binomial("logit"))

# MP-year FE
glm5_alt <- feglm(reg_sp ~ conversion | mp_year,
                  data = speeches_pt, family = binomial("logit"))

# MP-cap FE
glm6_alt <- feglm(reg_sp ~ conversion | mp_cap,
                  data = speeches_pt, family = binomial("logit"))

# MP-term FE
glm7_alt <- feglm(reg_sp ~ conversion |mp_term,
                  data = speeches_pt, family = binomial("logit"))

# Party-term FE
glm8_alt <- feglm(reg_sp ~ conversion | party_term,
                  data = speeches_pt, family = binomial("logit"))

etable(list(glm3_alt, glm4_alt, glm5_alt, glm6_alt, glm8_alt),
       dict = c(
         conversion   = "Open-access",
         nwords       = "Speech length",
         rapporteur   = "Rapporteur",
         gov_party    = "Government party"
       ),
       title = "Floor rules and geographical speeches, conditional logistic regression",
       tex = F) 


### TABLE C.2
multiglm1 <- glmer(formula = reg_sp ~ conversion + nwords + age + male + terms + 
                     rapporteur + gov_office + office + gov_party + local + south + 
                     + male  + maj_tier + as.factor(cap) + (1 + conversion | deputato), 
                   family = binomial,
                   data = speeches_sw)

stargazer(multiglm1, 
          title = "Floor rules and geographical speeches, random effects model", 
          dep.var.labels = "Geographical speech", 
          type = "text",
          style = "ajps", single.row = F, 
          omit = c("cap", "Constant"), 
          covariate.labels = c("Open-access", "Speech length",  "Age", "Male", "Tenure",
                               "Rapporteur", "Gov. office", "Leg. office", "Gov. party", 
                               "Local", "South", "Majoritarian tier"))

### TABLE C.3

nb1 <- glmer.nb(match_count ~ conversion + (1 | deputato),
                data = speeches_pt)


nb2 <- glmer.nb(match_count ~ conversion + + nwords + rapporteur + 
                  gov_party + as.factor(year) + as.factor(cap) + 
                  (1 | deputato),
                data = speeches_pt)




### TABLE C.4
negbin <- glm.nb(speech_geo ~ conversion +  age + male + terms + gov_office +
                   office + local + south + maj_tier + days_office, 
                 data = speeches_mp)

texreg(negbin, override.se = list(NULL))


### TABLE C.5
share <- lm(geo_share ~ conversion + age + male + terms + gov_office +
              office + local + south + maj_tier, data = speeches_mp)

stargazer(share, title = "Floor rules and share of geographical speeches, MP-level", type = "latex",
          style = "ajps", single.row = F, dep.var.labels = "Geographical speech", 
          omit = "Constant",
          covariate.labels = c("Open-access", "Age", "Male", "Tenure", "Gov. office", 
                               "Leg. office", "Local", "South", "Majoritarian tier"))



### TABLE D.1
glm2_1 <- feglm(reg_sp ~ conversion*loyalty, 
                family = binomial(link = "logit"),    
                data = speeches_sw)

glm2_2 <- feglm(reg_sp ~ conversion*loyalty + nwords + rapporteur +
                  gov_office + office + gov_party + local + south +
                    + male  + age   + as.factor(leg_term)  + as.factor(cap),
              family = binomial(link = "logit"),
              data = speeches_sw)

etable(glm2_1, glm2_2, tex = F, drop = "cap")


### TABLE D.2
glm3_1 <- feglm(reg_sp ~ conversion*as.factor(distr_topic) | deputato, 
                speeches, cluster = "deputato" )

glm3_2 <- feglm(reg_sp ~ conversion*as.factor(distr_topic) + nwords + rapporteur  + as.factor(year)
                | deputato, speeches, cluster = "deputato" )

etable(glm3_1, glm3_2, tex = F)


### TABLE D.3
glm4_1 <- feglm(reg_sp ~ conversion*maj_tier, 
                family = binomial(link = "logit"),    
                data = speeches_sw)

glm4_2 <- feglm(reg_sp ~ conversion*maj_tier + nwords + rapporteur +
                  gov_office + office + gov_party + local + south + maj_tier +
                  + male + age  + as.factor(leg_term) + as.factor(cap), 
                 
                family = binomial(link = "logit"),    
                data = speeches_sw)

coeftest(glm4_2, vcov = vcovCL, cluster = ~ deputato)


etable(glm4_1, glm4_2, tex = F, drop = c("cap", "year"))


### FIGURE D.1
speeches_pt <- speeches_pt %>% 
  mutate(party_short = case_when(
    str_detect(party, "PARTITO DEMOCRATICO") ~ "PD", 
    str_detect(party, "MOVIMENTO 5 STELLE") ~ "M5S", 
    str_detect(party, "LEGA") ~ "LN", 
    str_detect(party, "BERLUSCONI") ~ "FI", 
    str_detect(party, "FRATELLI") ~ "FdI", 
    str_detect(party, "MISTO") ~ "Misto", 
    TRUE ~ NA
  ))

speeches_sub <- speeches_pt %>% 
  filter(!is.na(party_short))

parties <- unique(speeches_sub$party_short)

results_list <- list()

for (p in parties) {
  # Subset data by party
  subdata <- filter(speeches_sub, party_short == p)
  
  # Fit the same model within each party
  mod <- clogit(reg_sp ~ conversion + strata(deputato), data = subdata)
  
  # Extract the conversion coefficient & store
  conv_est <- coef(mod)["conversion"]
  conv_se  <- sqrt(vcov(mod)["conversion", "conversion"])
  
  results_list[[p]] <- data.frame(
    party       = p,
    estimate    = conv_est,
    se          = conv_se,
    lower_95CI  = conv_est - 1.96 * conv_se,
    upper_95CI  = conv_est + 1.96 * conv_se
  )
}

results_df <- do.call(rbind, results_list)
results_df <- results_df %>% 
  arrange(desc(estimate))

ggplot(
  results_df,
  aes(x = estimate, y = reorder(party, estimate))
) +
  geom_point() +
  geom_errorbarh(aes(xmin = lower_95CI, xmax = upper_95CI), height = 0.2) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
  #theme_bw() +
  theme_classic(base_family = "Roboto Condensed", base_size = 14)+
  labs(
    x = "Coefficient estimate",
    y = "Party"
  )


### FIGURE D.2
speeches_gov0 <- subset(speeches_pt, gov_party == 0)
speeches_gov1 <- subset(speeches_pt, gov_party == 1)

glm_gov0 <- clogit(reg_sp ~ conversion + strata(deputato), data = speeches_gov0)
glm_gov1 <- clogit(reg_sp ~ conversion + strata(deputato), data = speeches_gov1)

tidy_gov0 <- tidy(glm_gov0, conf.int = TRUE) %>%
  filter(term == "conversion") %>%
  mutate(government = 0)

tidy_gov1 <- tidy(glm_gov1, conf.int = TRUE) %>%
  filter(term == "conversion") %>%
  mutate(government = 1)

subgroup_results <- bind_rows(tidy_gov0, tidy_gov1)

ggplot(subgroup_results, aes(x = estimate, y = factor(government))) +
  geom_point() +
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), height = 0.2) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
  scale_y_discrete(labels = c("0" = "Opposition", "1" = "Government")) +
  theme_classic(base_family = "Roboto Condensed", base_size = 14)+
  labs(
    x = "Coefficient estimate",
    y = "Party"
  )

### TABLE E.1
# Diversity
div1 <- plm(guiraud ~ conversion, 
            data = speeches_compl,
            index = "deputato", 
            model = "within")

div2 <- plm(guiraud ~ conversion + nwords + rapporteur + as.factor(year) +
              as.factor(cap), 
            data = speeches_compl,
            index = "deputato", 
            model = "within")

syll1 <- plm(read_syll ~ conversion, 
             data = speeches_compl,
             index = "deputato", 
             model = "within")

syll2 <- plm(read_syll ~ conversion + nwords + rapporteur + as.factor(year) +
               as.factor(cap), 
             data = speeches_compl,
             index = "deputato", 
             model = "within")

length1 <- plm(read_length ~ conversion, 
               data = speeches_compl,
               index = "deputato", 
               model = "within")

length2 <- plm(read_length ~ conversion + nwords + rapporteur + as.factor(year) +
                 as.factor(cap), 
               data = speeches_compl,
               index = "deputato", 
               model = "within")

gulpease1 <- plm(read_gulpease ~ conversion, 
                 data = speeches_compl,
                 index = "deputato", 
                 model = "within")

gulpease2 <- plm(read_gulpease ~ conversion + nwords + rapporteur + as.factor(year) +
                   as.factor(cap), 
                 data = speeches_compl,
                 index = "deputato", 
                 model = "within")


stargazer(syll1, syll2, div1, div2, gulpease1, gulpease2, 
          title = "Floor rules and language complexity",
          dep.var.labels = "Language complexity",
          omit = c("cap", "year"),  
          type = "text",
          style = "ajps", single.row = F, 
          covariate.labels = c("Open-access", "Speech length", "Rapporteur"))


### TABLE E.2
firstcount1 <- plm(first_number ~ conversion, 
                   data = speeches_compl,
                   index = "deputato", 
                   model = "within")

firstcount2 <- plm(first_number ~ conversion + nwords + rapporteur + as.factor(year) +
                     as.factor(cap), 
                   data = speeches_compl,
                   index = "deputato", 
                   model = "within")

firstdummy1 <- clogit(contains_first ~ conversion + strata(deputato), speeches_compl)

firstdummy2 <- clogit(contains_first ~ conversion + nwords + rapporteur + as.factor(year) +
                        as.factor(cap)  + strata(deputato), speeches_compl)

stargazer(firstcount1, firstcount2, firstdummy1, firstdummy2,
          title = "Floor rules and language complexity", 
          dep.var.labels = c("Count", "Dummy"),
          omit = c("cap", "year"),type = "text",
          style = "ajps", single.row = F, 
          covariate.labels = c("Open-access", "Speech length", "Rapporteur"))










